%MACRO print_propensity(in);

*****************************************************************************
* Program: PRINT_PROPENSITY					   						
* 																			
* Description: Performs diagnostic checks (common support,
*				standardized differences and empirical cumulative
*				distribution functions).
*						
* Last modified: September 4, 2015												
* Created by: Martin Wegman													
*****************************************************************************;

options nodate nonumber;
ods escapechar ="^";
%let date = %sysfunc(date(),DATE7.);

%let din = weights; * From propensity.sas;

%let site = site3;
%let cddc_val = 1;

data loc.&din;
	set loc.&in;
run;

****************************************************************************;
****************************************************************************;
************************     Model variables       *************************;
****************************************************************************;
****************************************************************************;

* Characteristics before treatment is assigned;

* Continuous vars;
%let var1 = 		dm0
					ih01_sqrt
					ih02_log
					ih05_totaltimes 
					du16_log 
					du02b_heroin_years 
					ss_so ss_fam ss_fri; 

* Binary vars;
%let var2b =		dm3_m
					dm4_m
					dm5_m  
					rbd1
					daily_heroin  
					lifetime_alcohol 
					lifetime_other_opiates 
					lifetime_benzos 
					lifetime_stimulants ;

* >2 level categorical vars;
%let var2m = 		dast_cat_m 
					dm2_m;

* Vars after treatment assignment;

* Continuous vars;
%let var3 = 	 cs01_craving
					so_rec_d so_amb_d so_ts_d; 
* Categorical vars;
%let var4 = ;


****************************************************************************;
****************************************************************************;
********************      Evaluate Common Support          *****************;
****************************************************************************;
****************************************************************************;

/* Evaluate common support by comparing the distributions of propensity
scores */

/* Create var needed for paired histograms */
data loc.temp1;
	set loc.&din; 
	if &site = &cddc_val then ps_pred_cddc = ps_pred;
	else ps_pred_vtc = ps_pred;
run;

proc sort data = loc.temp1;
	by combination _imputation_ &site;
run;

****************************************************************************;

%let file = "&out_dir.\CDDC Urine Supp PS common support &date..rtf";
ods rtf file = &file style = styles.very_narrow startpage = NO BODYTITLE;

/*
ods graphics / reset imagefmt=jpg width=8in height=9in imagename="pairedbox" border=off;
  
proc sgpanel data = loc.temp1;
	panelby _imputation_ combination /NOVARNAME layout=lattice columns=5 rows=9;
	hbox ps_pred /category= &site;
	COLAXIS label ="Probability of seeking treatment";
	ROWAXIS label ="Site";
	title "Common support for each imputation combination (stage 2 by stage 1)";
run;
ods graphics / reset=all ;
*/

/* paired histograms */

ods graphics / reset width=8in height=9.5in imagename="pairedhist" border=off;
goptions htext= 6;
proc sgpanel data = loc.temp1;
	panelby _imputation_ combination /NOVARNAME layout=lattice columns=5 rows=9;
  	histogram ps_pred_vtc/  scale = percent legendlabel='VTC' fillattrs=graphdata1  transparency=0.3 BINWIDTH=.05;
  	histogram ps_pred_cddc /scale = percent  legendlabel='CDDC' fillattrs=graphdata2  transparency=0.3 BINWIDTH=.05;
  	COLAXIS label ="Probability of seeking treatment";
	ROWAXIS label ="Percent of sample";
	title "Figure S5. Common support for each imputation combination (stage 2 by stage 1).";
run;
ods graphics / reset=all ;

%let text = 
^5n Note: Paired histograms reflecting common support for various 
stage 2 imputations by stage 1 imputation model combinations.
Each column is a different stage 2 imputation (M = 5). Along the right 
vertical axis, the label reads CDDC imputation model + 
VTC impuation model.^2n;

ods text = "^S={LEFTMARGIN = 1in RIGHTMARGIN = 1in}&text";
ods text = "^S={LEFTMARGIN = 1in RIGHTMARGIN = 1in}VTC: Voluntary Treatment Center";
ods text = "^S={LEFTMARGIN = 1in RIGHTMARGIN = 1in}CDDC: Compulsory Drug Detention Center";
ods text = "^S={LEFTMARGIN = 1in RIGHTMARGIN = 1in}SR: Stochastic regression imputation";
ods text = "^S={LEFTMARGIN = 1in RIGHTMARGIN = 1in}SV90: Single value imputation of length of inpatient stay (90 days)";
ods text = "^S={LEFTMARGIN = 1in RIGHTMARGIN = 1in}SV30: Single value imputation of length of inpatient stay (30 days)";
ods text = "^S={LEFTMARGIN = 1in RIGHTMARGIN = 1in}SV14: Single value imputation of time of first outcome measurement (14 days)";
ods text = "^S={LEFTMARGIN = 1in RIGHTMARGIN = 1in}SV0: Single value imputation of time of first outcome measurement (0 days)";


ods rtf close;

****************************************************************************;
****************************************************************************;
*****************             Helper functions                 *************;
****************************************************************************;
****************************************************************************;


%MACRO std_diff_cts(data);

proc sort data = loc.&data;
	by combination _imputation_ _stat_ &site;
run;

data loc.&data.2;
	set loc.&data (where = (&site ~= . AND _STAT_ in("MEAN","STD")) 
					drop = _TYPE_);
	by combination _imputation_ _stat_ &site;

	array list &var1 &var3;

	do over list;
		temp = lag(list);
		if last._stat_ and _stat_ = "MEAN" 
			then list = abs(list - temp);
		else if last._stat_ and _stat_ = "STD" 
			then list = sqrt((list*list + temp*temp)/2);
	end;

	temp2 = lag(_FREQ_);
	if last._stat_;

	sample = "VTC:"|| _FREQ_ ||", CDDC:"||temp2;

	drop temp temp2 &site _freq_;

run;

data loc.&data.3;
	set loc.&data.2;
	by combination _imputation_ _stat_;

	array list &var1 &var3;

	do over list;
		temp = lag(list);
		if last._imputation_ then list = temp / list;
	end;

	if last._imputation_;

	drop _stat_ temp;

run;

data loc.&data.4;
	set loc.&data.3;
	by combination _imputation_;

	array list &var1 &var3;

	do over list;
		temp = lag(list);
		if first.combination then temp = 0;
		list = sum(temp,list);
		if last.combination then list = list/5;
	end;

	if last.combination;

	drop temp _imputation_;

run;

proc transpose data=loc.&data.4 (drop = combination)
               out=loc.&data.4t (drop = _label_);
run;

%MEND;


%MACRO std_diff_cat(data);


proc sort data = loc.&data;
	by combination _imputation_ _stat_ &site;
run;

data loc.&data.2;
	set loc.&data (where = (&site ~= . AND _STAT_ = "MEAN"));
	by combination _imputation_ _stat_ &site;

	array list &var2b &var2m;

	do over list;
		temp = lag(list);
		if last._stat_ 
			then list = abs(list - temp)/sqrt((list*(1-list)+temp*(1-temp))/2);
	end;

	temp2 = lag(_FREQ_);
	if last._stat_;

	sample = "VTC:"|| _FREQ_ ||", CDDC:"||temp2;

	drop temp temp2 &site _freq_ _TYPE_ _stat_;

run;

data loc.&data.3;
	set loc.&data.2;
	by combination _imputation_;

	array list &var2b &var2m;

	do over list;
		temp = lag(list);
		if first.combination then temp = 0;
		list = sum(temp,list);
		if last.combination then list = list/5;
	end;

	if last.combination;

	drop temp _imputation_ ;

run;

proc transpose data=loc.&data.3 (drop = combination)
               out=loc.&data.3t (drop = _label_);
run;

%MEND;


proc format;

value $name_ 
"dm0" = "Age"
"ih01_sqrt" = "Times imprisoned"
"ih02_log" = "Times in lockup/jail"
"ih05_totaltimes" = "Times detained in CDDC"
"du16_log" = "Age of first drug use" 
"du02b_heroin_years" = "Years of heroin use"
"ss_so" = "Social support (significant other)"
"ss_fam" = "Social support (family)"
"ss_fri" = "Social support (friends)"
"dm3_m" = "Completed secondary school"
"dm4_m" = "Married"
"dm5_m"  = "Previous housing type"
"rbd1" = "Ever injected drugs"
"daily_heroin" = "Daily use of heroin before entering facility"
"lifetime_alcohol" = "Alcohol use (lifetime)"
"lifetime_other_opiates" = "Non-heroin opiate use (lifetime)"
"lifetime_benzos" = "Benzodiazepine use (lifetime)"
"lifetime_stimulants" = "Stimulant use (lifetime)"
"dast_cat_m2" = "Drug use severity (moderate)"
"dast_cat_m3" = "Drug use severity (substantial/severe)"
"dm2_m1" = "Ethnicity (Malay)"
"dm2_m2" = "Ethnicity (Chinese)"
"cs01_craving" = "Opiate cravings"
"so_rec_d" = "Readiness for change (recognition)"
"so_amb_d" = "Readiness for change (ambivalence)"
"so_ts_d" = "Readiness for change (taking steps)";

run;

****************************************************************************;
****************************************************************************;
*****************  Comparing weighted covariate distributions  *************;
****************************************************************************;
****************************************************************************;

/* Evaluate balance by comparing weighted distributions of
individual continuous variables */

* Compute standardized differences for cts vars;
* First weigthed std diffs;

ods exclude all;
proc means data = loc.temp1 std mean;
	by combination _IMPUTATION_;
	class &site;
	var &var1 &var3;
	weight wt2;
	output out = loc.means_w;
run;
ods exclude none;

%std_diff_cts(data=means_w);

****************************************************************************;

* Now compute unweigthed std diffs (original sample);

ods exclude all;
proc means data = loc.temp1 std mean;
	by combination _IMPUTATION_;
	class &site;
	var &var1 &var3;
	output out = loc.means_u;
run;
ods exclude none;

%std_diff_cts(data=means_u);

****************************************************************************;

* Categorical variable balance;

* Recode vars with >2 levels;
data loc.temp1b;
	set loc.temp1;

	dast_cat_m2 = 0;
	dast_cat_m3 = 0;
	dm2_m1 = 0;
	dm2_m2 = 0;

		 if dast_cat_m = 2 then dast_cat_m2 = 1;
	else if dast_cat_m = 3 then dast_cat_m3 = 1;

	 	 if dm2_m = 1 then dm2_m1 = 1;
	else if dm2_m = 2 then dm2_m2 = 1;

		 if dm5_m = 1 then dm5_m = 0;
	else if dm5_m = 2 then dm5_m = 1;

run;

%let var2m = dast_cat_m2 dast_cat_m3 dm2_m1 dm2_m2;

****************************************************************************;

* First for weighted sample;

ods exclude all;
proc means data = loc.temp1b mean nmiss;
	by combination _IMPUTATION_;
	class &site;
	var &var2b &var2m &var4;
	weight wt2;
	output out = loc.prop_w;
run;
ods exclude none;

%std_diff_cat(data=prop_w);

****************************************************************************;

* Then for unweighted;
ods exclude all;
proc means data = loc.temp1b mean nmiss;
	by combination _IMPUTATION_;
	class &site;
	var &var2b &var2m &var4;
	output out = loc.prop_u;
run;
ods exclude none;

%std_diff_cat(data=prop_u);

****************************************************************************;
* Print results;

data loc.wt;
	set loc.prop_w3t loc.means_w4t;
run;

data loc.ut;
	set loc.prop_u3t loc.means_u4t;
run;

****************************************************************************;


%let file = "&out_dir.\CDDC Urine Supp PS std diffs &date..rtf";
ods rtf file = &file style = styles.very_narrow startpage = NO BODYTITLE;

Title "Table S22a. Standardized differences by impuation combination (original sample weighting).";
proc print data = loc.ut label noobs; 
	label _NAME_= "Variable";
	label COL1 ="SR+SR";
	label COL2 ="SV14+SR";
	label COL3 ="SV0+SR";
	label COL4 ="SR+SV90";
	label COL5 ="SV14+SV90";
	label COL6 ="SV0+SV90";
	label COL7 ="SR+SV30";
	label COL8 ="SV14+SV30";
	label COL9 ="SV0+SV30";
	format COL1-COL9 4.3 _NAME_ $name_.;
	var _NAME_/style(header data)={just=l};
	var COL1-COL9 /style(header data)={just=c};
run;
Title;

*Notes on vars > 0.10:
dm3_m (all)
rbd1 (all)
dm4_m (VTC = SR)
dast_cat_m2 (VTC = SR)
dast_cat_m3 (VTC = SR)
ss_fam (VTC = SV90)
cs01_craving (VTC = SV90)
so_rec_d (VTC = SV90)
so_ts_d (all) - SV90 > 0.25
dm0 (all);
ods startpage = NOW;

Title "Table S22b. Standardized differences by impuation combination (IPTW).";
proc print data = loc.wt label noobs; 
	label _NAME_= "Variable";
	label COL1 ="SR+SR";
	label COL2 ="SV14+SR";
	label COL3 ="SV0+SR";
	label COL4 ="SR+SV90";
	label COL5 ="SV14+SV90";
	label COL6 ="SV0+SV90";
	label COL7 ="SR+SV30";
	label COL8 ="SV14+SV30";
	label COL9 ="SV0+SV30";
	format COL1-COL9 4.3 _NAME_ $name_.;
	var _NAME_/style(header data)={just=l};
	var COL1-COL9 /style(header data)={just=c};
run;
Title;

%let text = 
Note: Standardized differences of key characteristics using the original weighting 
and the inverse probability of treatment weighting (IPTW) for various stage 1 
imputation model combinations. Along the top axis, 
the label reads CDDC imputation model + 
VTC impuation model.^2n;

ods text = "^S={LEFTMARGIN = 1in RIGHTMARGIN = 1in}&text";
ods text = "^S={LEFTMARGIN = 1in RIGHTMARGIN = 1in}VTC: Voluntary Treatment Center";
ods text = "^S={LEFTMARGIN = 1in RIGHTMARGIN = 1in}CDDC: Compulsory Drug Detention Center";
ods text = "^S={LEFTMARGIN = 1in RIGHTMARGIN = 1in}SR: Stochastic regression imputation";
ods text = "^S={LEFTMARGIN = 1in RIGHTMARGIN = 1in}SV90: Single value imputation of length of inpatient stay (90 days)";
ods text = "^S={LEFTMARGIN = 1in RIGHTMARGIN = 1in}SV30: Single value imputation of length of inpatient stay (30 days)";
ods text = "^S={LEFTMARGIN = 1in RIGHTMARGIN = 1in}SV14: Single value imputation of time of first outcome measurement (14 days)";
ods text = "^S={LEFTMARGIN = 1in RIGHTMARGIN = 1in}SV0: Single value imputation of time of first outcome measurement (0 days)";

ods rtf close;

* Notes:
* all below 0.25 threshold, however the following > 0.10:
dm0 (VTC = SV90)
cs01_craving (VTC = SV90)
so_ts_d (all)
lifetime_other_opiates (where VTC = SV90)
daily_heroin (where VTC = SV90)
lifetime_stimulants (where VTC = SV90)
dm2_dm2 (where VTC = SV90);

****************************************************************************;
****************************************************************************;
****************************************************************************;
%let labels = %nrstr(
Age$
Times imprisoned$
Times in lockup/jail$
Times detained in CDDC$
Age of first drug use$
Years of heroin use$
Social support (significant other)$
Social support (family)$
Social support (friends)$
Completed secondary school$
Opiate cravings$
Readiness for change (recognition)$
Readiness for change (ambivalence)$
Readiness for change (taking steps));

* Plot density plots with weights;

%MACRO EDCFS(data,w);

%local i;
%let i = 1;
%let vals = a b c d e f g h i j k l m n o p q r s t u v w x y z;

%do %while("%scan(&var1 &var3,&i)" NE "");

	%let val = %scan(&vals, &i);
	%let var = %scan(&var1 &var3,&i);
	%let lab = %scan(&labels,&i,$);

	ods exclude all;
	%if &w = 0 %then %do;
		%let title = Figure S$. ECDF - original sample weighting;
		proc univariate data=loc.&data;
			by _IMPUTATION_ combination &site;
			var &var;
			output out=loc.P pctlpre=P pctlpts= 0 to 100 by 1;
		run;
	%end;
	%else %do;
		%let title = Figure S$. ECDF - IPTW;
		proc univariate data=loc.&data;
			by _IMPUTATION_ combination &site;
			var &var;
			weight  wt2;
			output out=loc.P pctlpre=P pctlpts= 0 to 100 by 1;
		run;
	%end;
	ods exclude none;

	data loc.P2;
		set loc.P;
		by _IMPUTATION_ combination &site;
		retain combination _imputation_;

		array list [*] P0-P100;

		do count = 0 to 100;
			CDDC = lag(list{count+1]);
			VTC = list[count+1];
			if &site = 1 then output;
		end;

		if &site = 0 then delete;

		drop &site P0-P100;
	run;

	%let title = %sysfunc(translate(&title,&val,$));

	Title "&title (&lab)";
	proc sgpanel data = loc.P2;
		panelby _imputation_ combination /NOVARNAME layout=lattice 
		columns=5 rows=9;
	  	scatter x = count y = VTC/legendlabel='VTC';
	  	scatter x = count y = CDDC/legendlabel='CDDC';
	  	COLAXIS label ="Percentile";
		ROWAXIS label = "&lab";
	run;
	Title;

	%let i = %eval(&i + 1);

%end;

%MEND;

proc sort data = loc.temp1; by _IMPUTATION_ combination &site; run;


%let file = "&out_dir.\CDDC Urine Supp PS ECDFs &date..rtf";
ods rtf file = &file style = styles.very_narrow startpage = NO BODYTITLE;
ods graphics / reset width=8in height=9.5in imagename="pairedhist" border=off;

%let data = temp1;
%let w = 0;

%EDCFS(&data,&w);

%let w = 1;

%EDCFS(&data,&w);

%let text = 
^5n Note: Empirical cumulative distribution functions reflecting 
balance between groups on key characteristics for
stage 2 imputations by stage 1 imputation model combinations.
Each column is a different stage 2 imputation (M = 5). Along the right 
vertical axis, the label reads CDDC imputation model + 
VTC impuation model.^2n;

ods text = "^S={LEFTMARGIN = 1in RIGHTMARGIN = 1in}&text";
ods text = "^S={LEFTMARGIN = 1in RIGHTMARGIN = 1in}VTC: Voluntary Treatment Center";
ods text = "^S={LEFTMARGIN = 1in RIGHTMARGIN = 1in}CDDC: Compulsory Drug Detention Center";
ods text = "^S={LEFTMARGIN = 1in RIGHTMARGIN = 1in}SR: Stochastic regression imputation";
ods text = "^S={LEFTMARGIN = 1in RIGHTMARGIN = 1in}SV90: Single value imputation of length of inpatient stay (90 days)";
ods text = "^S={LEFTMARGIN = 1in RIGHTMARGIN = 1in}SV30: Single value imputation of length of inpatient stay (30 days)";
ods text = "^S={LEFTMARGIN = 1in RIGHTMARGIN = 1in}SV14: Single value imputation of time of first outcome measurement (14 days)";
ods text = "^S={LEFTMARGIN = 1in RIGHTMARGIN = 1in}SV0: Single value imputation of time of first outcome measurement (0 days)";


ods rtf close;


****************************************************************************;
****************************************************************************;
*********************   Remove old datasets               ******************;
****************************************************************************;
****************************************************************************;


proc datasets lib=loc nolist;       
	delete weights temp: prop_: mean: p p2 ut wt;   
run;
quit;


****************************************************************************;
****************************************************************************;
*****************                   End                        *************;
****************************************************************************;
****************************************************************************;

%MEND print_propensity;
